package com.raisecom.util;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

public class HtmlParseUtil {

	public static String getHtmlDoc(String strUrl, String chartsetName)throws IOException {
		String temp;
		StringBuffer sbf = new StringBuffer();
		URL url = new URL(strUrl);
		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
		InputStreamReader isr = new InputStreamReader(conn.getInputStream(),
				chartsetName);
		BufferedReader br = new BufferedReader(isr);
		while ((temp = br.readLine()) != null) {
			sbf.append(temp);
		}
		br.close();
		isr.close();
		return sbf.toString();
	}

	public static void main(String[] args) {
		String searchPageHtml, productPageHtml, productUrl, paramPageHtml, paramUrl = null;
		String keyword = "Nokia+3100";
		String zolHost = "http://detail.zol.com.cn";
		Map<String, String> infoMap = new HashMap<String, String>();
		try {
			searchPageHtml = getHtmlDoc(
					"http://detail.zol.com.cn/index.php?c=SearchList&keyword="
							+ keyword, ChartSet.GB2312);
			Document doc = Jsoup.parse(searchPageHtml);
			// Elements els =
			// doc.select("div.intro ul.series_list li a[href^=/cell_phone]");
			Elements els = doc.select("ol#result_list a[href^=/cell_phone]");
			if (els != null && els.size() > 0) {
				productUrl = zolHost + els.first().attr("href");
				// System.out.println("手机型号："+els.first().ownText());
				System.out.println("商品页url地址：" + productUrl);
				productPageHtml = getHtmlDoc(productUrl, ChartSet.GB2312);
				doc = Jsoup.parse(productPageHtml);
				els = doc.select("ul.nav li a[href$=param.shtml]");
				paramUrl = zolHost + els.first().attr("href");
				System.out.println("手机详细参数url地址：" + paramUrl);
				paramPageHtml = getHtmlDoc(paramUrl, ChartSet.GB2312);
				doc = Jsoup.parse(paramPageHtml);
				System.out.println("手机型号："
						+ doc.select(
								"div.breadcrumb a[href^=/cell_phone/index]")
								.first().ownText());
				Elements els_params = doc.select("table.param_traditional tr");
				Elements els_title = els_params.select("th");
				Elements els_value = els_params
						.select("td div span[id^=oldPmVal]");
				for (int i = 0; i < els_title.size(); i++) {
					System.out.println(els_title.get(i).text() + ":"
							+ els_value.get(i).text());
					infoMap.put(els_title.get(i).text(), els_value.get(i)
							.text());
				}

			}

		} catch (IOException e) {
			e.printStackTrace();
		}
	}
}
